{
  "cells": [
    {
      "cell_type": "markdown",
      "id": "7e4c9601-fcb8-49e6-ae48-4de8b6cac494",
      "metadata": {
        "id": "7e4c9601-fcb8-49e6-ae48-4de8b6cac494"
      },
      "source": [
        "<div>\n",
        "    <h1 style=\"text-align: center;\">Large Language Models Projects</a></h1>\n",
        "    <h3>Apply and Implement Strategies for Large Language Models</h3>\n",
        "    <h2>2_2-ChromaDB Server mode</h2>\n",
        "    \n",
        "</div>\n",
        "\n",
        "by [Pere Martra](https://www.linkedin.com/in/pere-martra/)\n",
        "_________\n",
        "This notebook is set up to run in a local environment. If executed in Google Colab, access to the ChromaDB server that will be created won't be possible.\n",
        "\n",
        "The code is the same as the one used in notebook 2_1_Vector_Databases_LLMs.ipynb.\n",
        "The client has been implemented in the notebook 2_3-ChromaDB Client.\n",
        "__________"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "fbdcd16f-1aca-442b-8d84-3fafdba82dd2",
      "metadata": {
        "id": "fbdcd16f-1aca-442b-8d84-3fafdba82dd2",
        "outputId": "15a5c4e0-ba39-41f0-f9fc-7245b1f98919"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Collecting chromadb==0.4.20\n",
            "  Downloading chromadb-0.4.20-py3-none-any.whl.metadata (7.3 kB)\n",
            "Requirement already satisfied: requests>=2.28 in /Users/pere/miniforge3/envs/tf2023/lib/python3.10/site-packages (from chromadb==0.4.20) (2.31.0)\n",
            "Requirement already satisfied: pydantic>=1.9 in /Users/pere/miniforge3/envs/tf2023/lib/python3.10/site-packages (from chromadb==0.4.20) (2.4.2)\n",
            "Collecting chroma-hnswlib==0.7.3 (from chromadb==0.4.20)\n",
            "  Downloading chroma_hnswlib-0.7.3-cp310-cp310-macosx_11_0_arm64.whl.metadata (252 bytes)\n",
            "Collecting fastapi>=0.95.2 (from chromadb==0.4.20)\n",
            "  Downloading fastapi-0.105.0-py3-none-any.whl.metadata (24 kB)\n",
            "Collecting uvicorn>=0.18.3 (from uvicorn[standard]>=0.18.3->chromadb==0.4.20)\n",
            "  Downloading uvicorn-0.25.0-py3-none-any.whl.metadata (6.4 kB)\n",
            "Requirement already satisfied: numpy>=1.22.5 in /Users/pere/miniforge3/envs/tf2023/lib/python3.10/site-packages (from chromadb==0.4.20) (1.23.2)\n",
            "Collecting posthog>=2.4.0 (from chromadb==0.4.20)\n",
            "  Downloading posthog-3.1.0-py2.py3-none-any.whl.metadata (2.0 kB)\n",
            "Requirement already satisfied: typing-extensions>=4.5.0 in /Users/pere/miniforge3/envs/tf2023/lib/python3.10/site-packages (from chromadb==0.4.20) (4.8.0)\n",
            "Collecting pulsar-client>=3.1.0 (from chromadb==0.4.20)\n",
            "  Downloading pulsar_client-3.3.0-cp310-cp310-macosx_10_15_universal2.whl.metadata (1.0 kB)\n",
            "Collecting onnxruntime>=1.14.1 (from chromadb==0.4.20)\n",
            "  Downloading onnxruntime-1.16.3-cp310-cp310-macosx_11_0_arm64.whl.metadata (4.3 kB)\n",
            "Collecting opentelemetry-api>=1.2.0 (from chromadb==0.4.20)\n",
            "  Downloading opentelemetry_api-1.22.0-py3-none-any.whl.metadata (1.4 kB)\n",
            "Collecting opentelemetry-exporter-otlp-proto-grpc>=1.2.0 (from chromadb==0.4.20)\n",
            "  Downloading opentelemetry_exporter_otlp_proto_grpc-1.22.0-py3-none-any.whl.metadata (2.4 kB)\n",
            "Collecting opentelemetry-instrumentation-fastapi>=0.41b0 (from chromadb==0.4.20)\n",
            "  Downloading opentelemetry_instrumentation_fastapi-0.43b0-py3-none-any.whl.metadata (2.3 kB)\n",
            "Collecting opentelemetry-sdk>=1.2.0 (from chromadb==0.4.20)\n",
            "  Downloading opentelemetry_sdk-1.22.0-py3-none-any.whl.metadata (1.5 kB)\n",
            "Requirement already satisfied: tokenizers>=0.13.2 in /Users/pere/miniforge3/envs/tf2023/lib/python3.10/site-packages (from chromadb==0.4.20) (0.14.1)\n",
            "Collecting pypika>=0.48.9 (from chromadb==0.4.20)\n",
            "  Downloading PyPika-0.48.9.tar.gz (67 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.3/67.3 kB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Installing build dependencies ... \u001b[?25ldone\n",
            "\u001b[?25h  Getting requirements to build wheel ... \u001b[?25ldone\n",
            "\u001b[?25h  Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n",
            "\u001b[?25hRequirement already satisfied: tqdm>=4.65.0 in /Users/pere/miniforge3/envs/tf2023/lib/python3.10/site-packages (from chromadb==0.4.20) (4.66.1)\n",
            "Requirement already satisfied: overrides>=7.3.1 in /Users/pere/miniforge3/envs/tf2023/lib/python3.10/site-packages (from chromadb==0.4.20) (7.4.0)\n",
            "Requirement already satisfied: importlib-resources in /Users/pere/miniforge3/envs/tf2023/lib/python3.10/site-packages (from chromadb==0.4.20) (6.1.0)\n",
            "Collecting grpcio>=1.58.0 (from chromadb==0.4.20)\n",
            "  Downloading grpcio-1.60.0-cp310-cp310-macosx_12_0_universal2.whl.metadata (4.0 kB)\n",
            "Collecting bcrypt>=4.0.1 (from chromadb==0.4.20)\n",
            "  Downloading bcrypt-4.1.2-cp39-abi3-macosx_10_12_universal2.whl.metadata (9.5 kB)\n",
            "Collecting typer>=0.9.0 (from chromadb==0.4.20)\n",
            "  Downloading typer-0.9.0-py3-none-any.whl (45 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.9/45.9 kB\u001b[0m \u001b[31m4.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting kubernetes>=28.1.0 (from chromadb==0.4.20)\n",
            "  Downloading kubernetes-28.1.0-py2.py3-none-any.whl.metadata (1.5 kB)\n",
            "Requirement already satisfied: tenacity>=8.2.3 in /Users/pere/miniforge3/envs/tf2023/lib/python3.10/site-packages (from chromadb==0.4.20) (8.2.3)\n",
            "Requirement already satisfied: PyYAML>=6.0.0 in /Users/pere/miniforge3/envs/tf2023/lib/python3.10/site-packages (from chromadb==0.4.20) (6.0.1)\n",
            "Collecting mmh3>=4.0.1 (from chromadb==0.4.20)\n",
            "  Downloading mmh3-4.0.1-cp310-cp310-macosx_11_0_arm64.whl.metadata (13 kB)\n",
            "Requirement already satisfied: anyio<4.0.0,>=3.7.1 in /Users/pere/miniforge3/envs/tf2023/lib/python3.10/site-packages (from fastapi>=0.95.2->chromadb==0.4.20) (3.7.1)\n",
            "Collecting starlette<0.28.0,>=0.27.0 (from fastapi>=0.95.2->chromadb==0.4.20)\n",
            "  Downloading starlette-0.27.0-py3-none-any.whl.metadata (5.8 kB)\n",
            "Requirement already satisfied: certifi>=14.05.14 in /Users/pere/miniforge3/envs/tf2023/lib/python3.10/site-packages (from kubernetes>=28.1.0->chromadb==0.4.20) (2023.7.22)\n",
            "Requirement already satisfied: six>=1.9.0 in /Users/pere/miniforge3/envs/tf2023/lib/python3.10/site-packages (from kubernetes>=28.1.0->chromadb==0.4.20) (1.16.0)\n",
            "Requirement already satisfied: python-dateutil>=2.5.3 in /Users/pere/miniforge3/envs/tf2023/lib/python3.10/site-packages (from kubernetes>=28.1.0->chromadb==0.4.20) (2.8.2)\n",
            "Requirement already satisfied: google-auth>=1.0.1 in /Users/pere/miniforge3/envs/tf2023/lib/python3.10/site-packages (from kubernetes>=28.1.0->chromadb==0.4.20) (2.23.3)\n",
            "Requirement already satisfied: websocket-client!=0.40.0,!=0.41.*,!=0.42.*,>=0.32.0 in /Users/pere/miniforge3/envs/tf2023/lib/python3.10/site-packages (from kubernetes>=28.1.0->chromadb==0.4.20) (1.6.4)\n",
            "Requirement already satisfied: requests-oauthlib in /Users/pere/miniforge3/envs/tf2023/lib/python3.10/site-packages (from kubernetes>=28.1.0->chromadb==0.4.20) (1.3.1)\n",
            "Requirement already satisfied: oauthlib>=3.2.2 in /Users/pere/miniforge3/envs/tf2023/lib/python3.10/site-packages (from kubernetes>=28.1.0->chromadb==0.4.20) (3.2.2)\n",
            "Collecting urllib3<2.0,>=1.24.2 (from kubernetes>=28.1.0->chromadb==0.4.20)\n",
            "  Downloading urllib3-1.26.18-py2.py3-none-any.whl.metadata (48 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m48.9/48.9 kB\u001b[0m \u001b[31m4.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting coloredlogs (from onnxruntime>=1.14.1->chromadb==0.4.20)\n",
            "  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m4.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: flatbuffers in /Users/pere/miniforge3/envs/tf2023/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb==0.4.20) (23.5.26)\n",
            "Requirement already satisfied: packaging in /Users/pere/miniforge3/envs/tf2023/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb==0.4.20) (23.2)\n",
            "Requirement already satisfied: protobuf in /Users/pere/miniforge3/envs/tf2023/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb==0.4.20) (3.19.6)\n",
            "Requirement already satisfied: sympy in /Users/pere/miniforge3/envs/tf2023/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb==0.4.20) (1.12)\n",
            "Collecting deprecated>=1.2.6 (from opentelemetry-api>=1.2.0->chromadb==0.4.20)\n",
            "  Downloading Deprecated-1.2.14-py2.py3-none-any.whl.metadata (5.4 kB)\n",
            "Requirement already satisfied: importlib-metadata<7.0,>=6.0 in /Users/pere/miniforge3/envs/tf2023/lib/python3.10/site-packages (from opentelemetry-api>=1.2.0->chromadb==0.4.20) (6.8.0)\n",
            "Collecting backoff<3.0.0,>=1.10.0 (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb==0.4.20)\n",
            "  Downloading backoff-2.2.1-py3-none-any.whl (15 kB)\n",
            "Collecting googleapis-common-protos~=1.52 (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb==0.4.20)\n",
            "  Downloading googleapis_common_protos-1.62.0-py2.py3-none-any.whl.metadata (1.5 kB)\n",
            "Collecting opentelemetry-exporter-otlp-proto-common==1.22.0 (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb==0.4.20)\n",
            "  Downloading opentelemetry_exporter_otlp_proto_common-1.22.0-py3-none-any.whl.metadata (1.9 kB)\n",
            "Collecting opentelemetry-proto==1.22.0 (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb==0.4.20)\n",
            "  Downloading opentelemetry_proto-1.22.0-py3-none-any.whl.metadata (2.3 kB)\n",
            "Collecting opentelemetry-instrumentation-asgi==0.43b0 (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb==0.4.20)\n",
            "  Downloading opentelemetry_instrumentation_asgi-0.43b0-py3-none-any.whl.metadata (2.1 kB)\n",
            "Collecting opentelemetry-instrumentation==0.43b0 (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb==0.4.20)\n",
            "  Downloading opentelemetry_instrumentation-0.43b0-py3-none-any.whl.metadata (5.9 kB)\n",
            "Collecting opentelemetry-semantic-conventions==0.43b0 (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb==0.4.20)\n",
            "  Downloading opentelemetry_semantic_conventions-0.43b0-py3-none-any.whl.metadata (2.3 kB)\n",
            "Collecting opentelemetry-util-http==0.43b0 (from opentelemetry-instrumentation-fastapi>=0.41b0->chromadb==0.4.20)\n",
            "  Downloading opentelemetry_util_http-0.43b0-py3-none-any.whl.metadata (2.5 kB)\n",
            "Requirement already satisfied: setuptools>=16.0 in /Users/pere/miniforge3/envs/tf2023/lib/python3.10/site-packages (from opentelemetry-instrumentation==0.43b0->opentelemetry-instrumentation-fastapi>=0.41b0->chromadb==0.4.20) (68.2.2)\n",
            "Requirement already satisfied: wrapt<2.0.0,>=1.0.0 in /Users/pere/miniforge3/envs/tf2023/lib/python3.10/site-packages (from opentelemetry-instrumentation==0.43b0->opentelemetry-instrumentation-fastapi>=0.41b0->chromadb==0.4.20) (1.15.0)\n",
            "Collecting asgiref~=3.0 (from opentelemetry-instrumentation-asgi==0.43b0->opentelemetry-instrumentation-fastapi>=0.41b0->chromadb==0.4.20)\n",
            "  Downloading asgiref-3.7.2-py3-none-any.whl.metadata (9.2 kB)\n",
            "Collecting monotonic>=1.5 (from posthog>=2.4.0->chromadb==0.4.20)\n",
            "  Downloading monotonic-1.6-py2.py3-none-any.whl (8.2 kB)\n",
            "Requirement already satisfied: annotated-types>=0.4.0 in /Users/pere/miniforge3/envs/tf2023/lib/python3.10/site-packages (from pydantic>=1.9->chromadb==0.4.20) (0.6.0)\n",
            "Requirement already satisfied: pydantic-core==2.10.1 in /Users/pere/miniforge3/envs/tf2023/lib/python3.10/site-packages (from pydantic>=1.9->chromadb==0.4.20) (2.10.1)\n",
            "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/pere/miniforge3/envs/tf2023/lib/python3.10/site-packages (from requests>=2.28->chromadb==0.4.20) (3.3.0)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /Users/pere/miniforge3/envs/tf2023/lib/python3.10/site-packages (from requests>=2.28->chromadb==0.4.20) (3.4)\n",
            "Requirement already satisfied: huggingface_hub<0.18,>=0.16.4 in /Users/pere/miniforge3/envs/tf2023/lib/python3.10/site-packages (from tokenizers>=0.13.2->chromadb==0.4.20) (0.17.3)\n",
            "Requirement already satisfied: click<9.0.0,>=7.1.1 in /Users/pere/miniforge3/envs/tf2023/lib/python3.10/site-packages (from typer>=0.9.0->chromadb==0.4.20) (8.1.7)\n",
            "Requirement already satisfied: h11>=0.8 in /Users/pere/miniforge3/envs/tf2023/lib/python3.10/site-packages (from uvicorn>=0.18.3->uvicorn[standard]>=0.18.3->chromadb==0.4.20) (0.14.0)\n",
            "Collecting httptools>=0.5.0 (from uvicorn[standard]>=0.18.3->chromadb==0.4.20)\n",
            "  Downloading httptools-0.6.1-cp310-cp310-macosx_10_9_universal2.whl.metadata (3.6 kB)\n",
            "Collecting python-dotenv>=0.13 (from uvicorn[standard]>=0.18.3->chromadb==0.4.20)\n",
            "  Downloading python_dotenv-1.0.0-py3-none-any.whl (19 kB)\n",
            "Collecting uvloop!=0.15.0,!=0.15.1,>=0.14.0 (from uvicorn[standard]>=0.18.3->chromadb==0.4.20)\n",
            "  Downloading uvloop-0.19.0-cp310-cp310-macosx_10_9_universal2.whl.metadata (4.9 kB)\n",
            "Collecting watchfiles>=0.13 (from uvicorn[standard]>=0.18.3->chromadb==0.4.20)\n",
            "  Downloading watchfiles-0.21.0-cp310-cp310-macosx_11_0_arm64.whl.metadata (4.9 kB)\n",
            "Collecting websockets>=10.4 (from uvicorn[standard]>=0.18.3->chromadb==0.4.20)\n",
            "  Downloading websockets-12.0-cp310-cp310-macosx_11_0_arm64.whl.metadata (6.6 kB)\n",
            "Requirement already satisfied: sniffio>=1.1 in /Users/pere/miniforge3/envs/tf2023/lib/python3.10/site-packages (from anyio<4.0.0,>=3.7.1->fastapi>=0.95.2->chromadb==0.4.20) (1.3.0)\n",
            "Requirement already satisfied: exceptiongroup in /Users/pere/miniforge3/envs/tf2023/lib/python3.10/site-packages (from anyio<4.0.0,>=3.7.1->fastapi>=0.95.2->chromadb==0.4.20) (1.1.3)\n",
            "Requirement already satisfied: cachetools<6.0,>=2.0.0 in /Users/pere/miniforge3/envs/tf2023/lib/python3.10/site-packages (from google-auth>=1.0.1->kubernetes>=28.1.0->chromadb==0.4.20) (5.3.1)\n",
            "Requirement already satisfied: pyasn1-modules>=0.2.1 in /Users/pere/miniforge3/envs/tf2023/lib/python3.10/site-packages (from google-auth>=1.0.1->kubernetes>=28.1.0->chromadb==0.4.20) (0.3.0)\n",
            "Requirement already satisfied: rsa<5,>=3.1.4 in /Users/pere/miniforge3/envs/tf2023/lib/python3.10/site-packages (from google-auth>=1.0.1->kubernetes>=28.1.0->chromadb==0.4.20) (4.9)\n",
            "Requirement already satisfied: filelock in /Users/pere/miniforge3/envs/tf2023/lib/python3.10/site-packages (from huggingface_hub<0.18,>=0.16.4->tokenizers>=0.13.2->chromadb==0.4.20) (3.12.4)\n",
            "Requirement already satisfied: fsspec in /Users/pere/miniforge3/envs/tf2023/lib/python3.10/site-packages (from huggingface_hub<0.18,>=0.16.4->tokenizers>=0.13.2->chromadb==0.4.20) (2023.6.0)\n",
            "Requirement already satisfied: zipp>=0.5 in /Users/pere/miniforge3/envs/tf2023/lib/python3.10/site-packages (from importlib-metadata<7.0,>=6.0->opentelemetry-api>=1.2.0->chromadb==0.4.20) (3.17.0)\n",
            "Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime>=1.14.1->chromadb==0.4.20)\n",
            "  Downloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: mpmath>=0.19 in /Users/pere/miniforge3/envs/tf2023/lib/python3.10/site-packages (from sympy->onnxruntime>=1.14.1->chromadb==0.4.20) (1.3.0)\n",
            "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in /Users/pere/miniforge3/envs/tf2023/lib/python3.10/site-packages (from pyasn1-modules>=0.2.1->google-auth>=1.0.1->kubernetes>=28.1.0->chromadb==0.4.20) (0.5.0)\n",
            "Downloading chromadb-0.4.20-py3-none-any.whl (507 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m507.7/507.7 kB\u001b[0m \u001b[31m10.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n",
            "\u001b[?25hDownloading chroma_hnswlib-0.7.3-cp310-cp310-macosx_11_0_arm64.whl (197 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m197.1/197.1 kB\u001b[0m \u001b[31m9.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading bcrypt-4.1.2-cp39-abi3-macosx_10_12_universal2.whl (528 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m528.5/528.5 kB\u001b[0m \u001b[31m12.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n",
            "\u001b[?25hDownloading fastapi-0.105.0-py3-none-any.whl (93 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m93.1/93.1 kB\u001b[0m \u001b[31m7.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading grpcio-1.60.0-cp310-cp310-macosx_12_0_universal2.whl (9.6 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.6/9.6 MB\u001b[0m \u001b[31m12.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
            "\u001b[?25hDownloading kubernetes-28.1.0-py2.py3-none-any.whl (1.6 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m12.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
            "\u001b[?25hDownloading mmh3-4.0.1-cp310-cp310-macosx_11_0_arm64.whl (35 kB)\n",
            "Downloading onnxruntime-1.16.3-cp310-cp310-macosx_11_0_arm64.whl (6.2 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.2/6.2 MB\u001b[0m \u001b[31m13.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
            "\u001b[?25hDownloading opentelemetry_api-1.22.0-py3-none-any.whl (57 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.9/57.9 kB\u001b[0m \u001b[31m3.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading opentelemetry_exporter_otlp_proto_grpc-1.22.0-py3-none-any.whl (18 kB)\n",
            "Downloading opentelemetry_exporter_otlp_proto_common-1.22.0-py3-none-any.whl (17 kB)\n",
            "Downloading opentelemetry_proto-1.22.0-py3-none-any.whl (50 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.8/50.8 kB\u001b[0m \u001b[31m4.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading opentelemetry_instrumentation_fastapi-0.43b0-py3-none-any.whl (11 kB)\n",
            "Downloading opentelemetry_instrumentation-0.43b0-py3-none-any.whl (28 kB)\n",
            "Downloading opentelemetry_instrumentation_asgi-0.43b0-py3-none-any.whl (14 kB)\n",
            "Downloading opentelemetry_semantic_conventions-0.43b0-py3-none-any.whl (36 kB)\n",
            "Downloading opentelemetry_util_http-0.43b0-py3-none-any.whl (6.9 kB)\n",
            "Downloading opentelemetry_sdk-1.22.0-py3-none-any.whl (105 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m105.6/105.6 kB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading posthog-3.1.0-py2.py3-none-any.whl (37 kB)\n",
            "Downloading pulsar_client-3.3.0-cp310-cp310-macosx_10_15_universal2.whl (10.9 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m10.9/10.9 MB\u001b[0m \u001b[31m12.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
            "\u001b[?25hDownloading uvicorn-0.25.0-py3-none-any.whl (60 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m60.3/60.3 kB\u001b[0m \u001b[31m5.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading Deprecated-1.2.14-py2.py3-none-any.whl (9.6 kB)\n",
            "Downloading googleapis_common_protos-1.62.0-py2.py3-none-any.whl (228 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m228.7/228.7 kB\u001b[0m \u001b[31m13.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading httptools-0.6.1-cp310-cp310-macosx_10_9_universal2.whl (149 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m149.8/149.8 kB\u001b[0m \u001b[31m10.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading starlette-0.27.0-py3-none-any.whl (66 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.0/67.0 kB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading urllib3-1.26.18-py2.py3-none-any.whl (143 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m143.8/143.8 kB\u001b[0m \u001b[31m10.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading uvloop-0.19.0-cp310-cp310-macosx_10_9_universal2.whl (1.4 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.4/1.4 MB\u001b[0m \u001b[31m14.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
            "\u001b[?25hDownloading watchfiles-0.21.0-cp310-cp310-macosx_11_0_arm64.whl (418 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m418.3/418.3 kB\u001b[0m \u001b[31m10.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading websockets-12.0-cp310-cp310-macosx_11_0_arm64.whl (121 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m121.3/121.3 kB\u001b[0m \u001b[31m7.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading asgiref-3.7.2-py3-none-any.whl (24 kB)\n",
            "Building wheels for collected packages: pypika\n",
            "  Building wheel for pypika (pyproject.toml) ... \u001b[?25ldone\n",
            "\u001b[?25h  Created wheel for pypika: filename=PyPika-0.48.9-py2.py3-none-any.whl size=53723 sha256=bbd24c13ce8b13ed4b60b22b80d877db0d4e9ffb31cfd6f3824f960f475fb663\n",
            "  Stored in directory: /Users/pere/Library/Caches/pip/wheels/e1/26/51/d0bffb3d2fd82256676d7ad3003faea3bd6dddc9577af665f4\n",
            "Successfully built pypika\n",
            "Installing collected packages: pypika, monotonic, mmh3, websockets, uvloop, uvicorn, urllib3, typer, python-dotenv, pulsar-client, opentelemetry-util-http, opentelemetry-semantic-conventions, opentelemetry-proto, humanfriendly, httptools, grpcio, googleapis-common-protos, deprecated, chroma-hnswlib, bcrypt, backoff, asgiref, watchfiles, starlette, opentelemetry-exporter-otlp-proto-common, opentelemetry-api, coloredlogs, posthog, opentelemetry-sdk, opentelemetry-instrumentation, onnxruntime, fastapi, opentelemetry-instrumentation-asgi, opentelemetry-exporter-otlp-proto-grpc, kubernetes, opentelemetry-instrumentation-fastapi, chromadb\n",
            "  Attempting uninstall: urllib3\n",
            "    Found existing installation: urllib3 2.0.6\n",
            "    Uninstalling urllib3-2.0.6:\n",
            "      Successfully uninstalled urllib3-2.0.6\n",
            "  Attempting uninstall: grpcio\n",
            "    Found existing installation: grpcio 1.46.3\n",
            "    Uninstalling grpcio-1.46.3:\n",
            "      Successfully uninstalled grpcio-1.46.3\n",
            "Successfully installed asgiref-3.7.2 backoff-2.2.1 bcrypt-4.1.2 chroma-hnswlib-0.7.3 chromadb-0.4.20 coloredlogs-15.0.1 deprecated-1.2.14 fastapi-0.105.0 googleapis-common-protos-1.62.0 grpcio-1.60.0 httptools-0.6.1 humanfriendly-10.0 kubernetes-28.1.0 mmh3-4.0.1 monotonic-1.6 onnxruntime-1.16.3 opentelemetry-api-1.22.0 opentelemetry-exporter-otlp-proto-common-1.22.0 opentelemetry-exporter-otlp-proto-grpc-1.22.0 opentelemetry-instrumentation-0.43b0 opentelemetry-instrumentation-asgi-0.43b0 opentelemetry-instrumentation-fastapi-0.43b0 opentelemetry-proto-1.22.0 opentelemetry-sdk-1.22.0 opentelemetry-semantic-conventions-0.43b0 opentelemetry-util-http-0.43b0 posthog-3.1.0 pulsar-client-3.3.0 pypika-0.48.9 python-dotenv-1.0.0 starlette-0.27.0 typer-0.9.0 urllib3-1.26.18 uvicorn-0.25.0 uvloop-0.19.0 watchfiles-0.21.0 websockets-12.0\n"
          ]
        }
      ],
      "source": [
        "!pip install chromadb==0.4.20"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "d6c765af-dc87-41e3-891d-a6e934a53259",
      "metadata": {
        "id": "d6c765af-dc87-41e3-891d-a6e934a53259"
      },
      "outputs": [],
      "source": [
        "import numpy as np\n",
        "import pandas as pd"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "8be6c63a-5077-4550-98f7-6e3e118872a5",
      "metadata": {
        "id": "8be6c63a-5077-4550-98f7-6e3e118872a5"
      },
      "outputs": [],
      "source": [
        "#download and unzip  the dataset from kaggle:\n",
        "#https://www.kaggle.com/datasets/kotartemiy/topic-labeled-news-dataset\n",
        "\n",
        "#Pass the directory where the .csv file is stored to read_csv\n",
        "news = pd.read_csv('./kaggle/labelled_newscatcher_dataset.csv', sep=';')\n",
        "MAX_NEWS = 1000\n",
        "DOCUMENT=\"title\"\n",
        "TOPIC=\"topic\""
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "6a2fd5a3-29a4-4d5b-9527-fd431671ed93",
      "metadata": {
        "id": "6a2fd5a3-29a4-4d5b-9527-fd431671ed93"
      },
      "outputs": [],
      "source": [
        "#Because it is just a example we select a small portion of News.\n",
        "subset_news = news.head(MAX_NEWS)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "5e7fc76a-c828-4db4-bada-b1e14c2eefea",
      "metadata": {
        "id": "5e7fc76a-c828-4db4-bada-b1e14c2eefea"
      },
      "outputs": [],
      "source": [
        "import chromadb"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "04b25934-62bd-4720-871a-7e516807c3e7",
      "metadata": {
        "id": "04b25934-62bd-4720-871a-7e516807c3e7"
      },
      "outputs": [],
      "source": [
        "chroma_client = chromadb.PersistentClient(path=\"./chromadb\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "06549d87-b66f-44b7-a1dc-b6c122685602",
      "metadata": {
        "id": "06549d87-b66f-44b7-a1dc-b6c122685602"
      },
      "outputs": [],
      "source": [
        "collection_name = \"local_news_collection\"\n",
        "if len(chroma_client.list_collections()) > 0 and collection_name in [chroma_client.list_collections()[0].name]:\n",
        "        chroma_client.delete_collection(name=collection_name)\n",
        "\n",
        "collection = chroma_client.create_collection(name=collection_name)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "f8b6d5c4-0f6c-4bfd-9bab-6f0df0fa2469",
      "metadata": {
        "id": "f8b6d5c4-0f6c-4bfd-9bab-6f0df0fa2469",
        "outputId": "a281dc8b-4d31-4ecf-a50a-95cec127d7d6"
      },
      "outputs": [
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "/Users/pere/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx.tar.gz: 100%|█| 79.3\n"
          ]
        }
      ],
      "source": [
        "collection.add(\n",
        "    documents=subset_news[DOCUMENT].tolist(),\n",
        "    metadatas=[{TOPIC: topic} for topic in subset_news[TOPIC].tolist()],\n",
        "    ids=[f\"id{x}\" for x in range(MAX_NEWS)],\n",
        ")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "9c9211e7-bd14-4cd0-b617-6b520de23e1b",
      "metadata": {
        "id": "9c9211e7-bd14-4cd0-b617-6b520de23e1b",
        "outputId": "2358d16f-6269-4aa9-de1a-5f6f4c15134d"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "{'ids': [['id173', 'id829', 'id117', 'id535', 'id141', 'id218', 'id390', 'id273', 'id56', 'id900']], 'distances': [[0.8593592047691345, 1.0294400453567505, 1.0793328285217285, 1.093001365661621, 1.1329681873321533, 1.2130439281463623, 1.2143322229385376, 1.2164145708084106, 1.222063660621643, 1.275417447090149]], 'metadatas': [[{'topic': 'TECHNOLOGY'}, {'topic': 'TECHNOLOGY'}, {'topic': 'TECHNOLOGY'}, {'topic': 'TECHNOLOGY'}, {'topic': 'TECHNOLOGY'}, {'topic': 'TECHNOLOGY'}, {'topic': 'TECHNOLOGY'}, {'topic': 'TECHNOLOGY'}, {'topic': 'TECHNOLOGY'}, {'topic': 'TECHNOLOGY'}]], 'embeddings': None, 'documents': [['The Legendary Toshiba is Officially Done With Making Laptops', '3 gaming laptop deals you can’t afford to miss today', 'Lenovo and HP control half of the global laptop market', 'Asus ROG Zephyrus G14 gaming laptop announced in India', 'Acer Swift 3 featuring a 10th-generation Intel Ice Lake CPU, 2K screen, and more launched in India for INR 64999 (US$865)', \"Apple's Next MacBook Could Be the Cheapest in Company's History\", \"Features of Huawei's Desktop Computer Revealed\", 'Redmi to launch its first gaming laptop on August 14: Here are all the details', 'Toshiba shuts the lid on laptops after 35 years', 'This is the cheapest Windows PC by a mile and it even has a spare SSD slot']], 'uris': None, 'data': None}\n"
          ]
        }
      ],
      "source": [
        "results = collection.query(query_texts=[\"laptop\"], n_results=10 )\n",
        "\n",
        "print(results)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "65a107f7-efbb-4a4a-8fe4-4137f00d925f",
      "metadata": {
        "id": "65a107f7-efbb-4a4a-8fe4-4137f00d925f",
        "outputId": "fdc6fa12-0122-45a1-e3d6-07c23e8f994d"
      },
      "outputs": [
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
            "To disable this warning, you can either:\n",
            "\t- Avoid using `tokenizers` before the fork if possible\n",
            "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "\u001b[1m\n",
            "\n",
            "                \u001b[38;5;069m(((((((((    \u001b[38;5;203m(((((\u001b[38;5;220m####\n",
            "             \u001b[38;5;069m(((((((((((((\u001b[38;5;203m(((((((((\u001b[38;5;220m#########\n",
            "           \u001b[38;5;069m(((((((((((((\u001b[38;5;203m(((((((((((\u001b[38;5;220m###########\n",
            "         \u001b[38;5;069m((((((((((((((\u001b[38;5;203m((((((((((((\u001b[38;5;220m############\n",
            "        \u001b[38;5;069m(((((((((((((\u001b[38;5;203m((((((((((((((\u001b[38;5;220m#############\n",
            "        \u001b[38;5;069m(((((((((((((\u001b[38;5;203m((((((((((((((\u001b[38;5;220m#############\n",
            "         \u001b[38;5;069m((((((((((((\u001b[38;5;203m(((((((((((((\u001b[38;5;220m##############\n",
            "         \u001b[38;5;069m((((((((((((\u001b[38;5;203m((((((((((((\u001b[38;5;220m##############\n",
            "           \u001b[38;5;069m((((((((((\u001b[38;5;203m(((((((((((\u001b[38;5;220m#############\n",
            "             \u001b[38;5;069m((((((((\u001b[38;5;203m((((((((\u001b[38;5;220m##############\n",
            "                \u001b[38;5;069m(((((\u001b[38;5;203m((((    \u001b[38;5;220m#########\u001b[0m\n",
            "\n",
            "    \n",
            "\u001b[1m\n",
            "Running Chroma\n",
            "\u001b[0m\n",
            "\u001b[1mSaving data to\u001b[0m: \u001b[32m./chromadb\u001b[0m\n",
            "\u001b[1mConnect to chroma at\u001b[0m: \u001b[32mhttp://localhost:8000\u001b[0m\n",
            "\u001b[1mGetting started guide\u001b[0m: https://docs.trychroma.com/getting-started\n",
            "\n",
            "\n",
            "\u001b[32mINFO\u001b[0m:     [24-12-2023 16:53:25] Set chroma_server_nofile to 65535\n",
            "\u001b[32mINFO\u001b[0m:     [24-12-2023 16:53:25] Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.\n",
            "\u001b[36mDEBUG\u001b[0m:    [24-12-2023 16:53:25] Starting component System\n",
            "\u001b[36mDEBUG\u001b[0m:    [24-12-2023 16:53:25] Starting component OpenTelemetryClient\n",
            "\u001b[36mDEBUG\u001b[0m:    [24-12-2023 16:53:25] Starting component SimpleAssignmentPolicy\n",
            "\u001b[36mDEBUG\u001b[0m:    [24-12-2023 16:53:25] Starting component SqliteDB\n",
            "\u001b[36mDEBUG\u001b[0m:    [24-12-2023 16:53:25] Starting component Posthog\n",
            "\u001b[36mDEBUG\u001b[0m:    [24-12-2023 16:53:25] Starting component LocalSegmentManager\n",
            "\u001b[36mDEBUG\u001b[0m:    [24-12-2023 16:53:25] Starting component SegmentAPI\n",
            "\u001b[32mINFO\u001b[0m:     [24-12-2023 16:53:25] Started server process [\u001b[36m7527\u001b[0m]\n",
            "\u001b[32mINFO\u001b[0m:     [24-12-2023 16:53:25] Waiting for application startup.\n",
            "\u001b[32mINFO\u001b[0m:     [24-12-2023 16:53:25] Application startup complete.\n",
            "\u001b[32mINFO\u001b[0m:     [24-12-2023 16:53:25] Uvicorn running on \u001b[1mhttp://localhost:8000\u001b[0m (Press CTRL+C to quit)\n",
            "\u001b[32mINFO\u001b[0m:     [24-12-2023 16:57:57] ::1:52448 - \"GET /api/v1/tenants/default_tenant HTTP/1.1\" 200\n",
            "\u001b[32mINFO\u001b[0m:     [24-12-2023 16:57:57] ::1:52448 - \"GET /api/v1/databases/default_database?tenant=default_tenant HTTP/1.1\" 200\n",
            "\u001b[32mINFO\u001b[0m:     [24-12-2023 17:06:21] ::1:52463 - \"GET /api/v1/collections/local_news_collection?tenant=default_tenant&database=default_database HTTP/1.1\" 200\n",
            "\u001b[36mDEBUG\u001b[0m:    [24-12-2023 17:06:22] Starting component PersistentLocalHnswSegment\n",
            "\u001b[32mINFO\u001b[0m:     [24-12-2023 17:06:22] ::1:52463 - \"POST /api/v1/collections/a37539aa-239b-44f5-8079-c36926d21419/query HTTP/1.1\" 200\n",
            "^C\n",
            "\u001b[32mINFO\u001b[0m:     [24-12-2023 17:56:12] Shutting down\n",
            "\u001b[32mINFO\u001b[0m:     [24-12-2023 17:56:12] Waiting for application shutdown.\n",
            "\u001b[32mINFO\u001b[0m:     [24-12-2023 17:56:12] Application shutdown complete.\n",
            "\u001b[32mINFO\u001b[0m:     [24-12-2023 17:56:12] Finished server process [\u001b[36m7527\u001b[0m]\n"
          ]
        }
      ],
      "source": [
        "#Running Chroma in Server Mode\n",
        "!chroma run --path ./chromadb"
      ]
    },
    {
      "cell_type": "markdown",
      "id": "bcb943bf-a5b2-4bb3-a265-72e2633ab98a",
      "metadata": {
        "id": "bcb943bf-a5b2-4bb3-a265-72e2633ab98a"
      },
      "source": [
        "You have the code to test this server in the notebook 2_3-ChromaDB Client.ipynb"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "id": "a9428dd9-39dd-4d96-9bdb-05b6c00cfe67",
      "metadata": {
        "id": "a9428dd9-39dd-4d96-9bdb-05b6c00cfe67"
      },
      "outputs": [],
      "source": []
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3 (ipykernel)",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.10.12"
    },
    "colab": {
      "provenance": []
    }
  },
  "nbformat": 4,
  "nbformat_minor": 5
}